###############################################################
### Replication Code: STM Analysis                          ###
### Title: Can Conservatives Be Persuaded to Support UBI?   ###
### Author: Eddy S. F. Yeung                                ###
### Version: September 6, 2022                              ###
###############################################################

# Set-up ----
## Clean the R environment and set the working directory
rm(list = ls())
setwd("~/Desktop/UBI/PB_replication") # change to your own directory

## Load the required packages
library(tidyverse) # version 1.3.1
library(stm)       # version 1.3.6
library(tm)        # version 0.7-8
library(stringi)   # version 1.7.6
library(extrafont) # version 0.17

## Import the cleaned dataset
load("cleaned.RData")
df$text <- ifelse(df$support >= 4 & df$support <= 6, df$open.ended.favor,
                  ifelse(df$support == 3, df$open.ended.neutral,
                  ifelse(df$support >= 0 & df$support <= 2, df$open.ended.oppose, NA)))
df$group <- as.factor(df$group)

# Process the text data ----
## Convert curly apostrophes to straight apostrophes
df$text <- gsub("[\u2018\u2019\u201A\u201B\u2032\u2035]", "'", df$text)

## Standard processing using stm
processed <- textProcessor(df$text, metadata = df)
out <- prepDocuments(processed$documents, processed$vocab, processed$meta)
docs <- out$documents
vocab <- out$vocab
meta <- out$meta

# Analysis ----
## Run the STM (assuming K = 4, i.e., estimating a 4-topic STM) ----
selectedmodel <- 
  stm(out$documents, out$vocab, K = 4,
      prevalence =~ treatment1 + treatment2 + cons.score + 
        treatment1 * cons.score + treatment2 * cons.score,
      data = out$meta, 
      init.type = "Spectral", seed = 1234567)

## Table A4: Top Words and Theme for Each Topic (column 2) ----
labelTopics(selectedmodel)

## Figure A8: Word Cloud of Topic 1 (Policy Analysis) ----
pdf("fig_cloud1.pdf", height = 4, width = 4)
par(family = "Times", ps = 12)
set.seed(1234567)
cloud(selectedmodel, topic = 1)
dev.off()

## Figure A9: Word Cloud of Topic 2 (Helping the Needy) ----
pdf("fig_cloud2.pdf", height = 4, width = 4)
par(family = "Times", ps = 12)
set.seed(1234567)
cloud(selectedmodel, topic = 2)
dev.off()

## Figure A10: Word Cloud of Topic 3 (Poverty and Inequality) ----
pdf("fig_cloud3.pdf", height = 4, width = 4)
par(family = "Times", ps = 12)
set.seed(1234567)
cloud(selectedmodel, topic = 3)
dev.off()

## Figure A11: Word Cloud of Topic 4 (Discouraging Work) ----
pdf("fig_cloud4.pdf", height = 4, width = 4)
par(family = "Times", ps = 12)
set.seed(1234567)
cloud(selectedmodel, topic = 4)
dev.off()

## Figure A12: Representative Responses of Topic 1 (Policy Analysis) ----
thoughts1 <- findThoughts(selectedmodel, texts = meta$text, n = 7, topics = 1)$docs[[1]]
pdf("fig_rep-responses1.pdf", height = 8, width = 12)
par(family = "Times", ps = 15)
plotQuote(thoughts1, width = 100, main = "", family = "Times")
dev.off()

## Figure A13: Representative Responses of Topic 2 (Helping the Needy) ----
thoughts2 <- findThoughts(selectedmodel, texts = meta$text, n = 7, topics = 2)$docs[[1]]
pdf("fig_rep-responses2.pdf", height = 8, width = 12)
par(family = "Times", ps = 15)
plotQuote(thoughts2, width = 100, main = "", family = "Times")
dev.off()

## Figure A14: Representative Responses of Topic 3 (Poverty and Inequality) ----
thoughts3 <- findThoughts(selectedmodel, texts = meta$text, n = 7, topics = 3)$docs[[1]]
pdf("fig_rep-responses3.pdf", height = 8, width = 12)
par(family = "Times", ps = 15)
plotQuote(thoughts3, width = 100, main = "", family = "Times")
dev.off()

## Figure A15: Representative Responses of Topic 4 (Discouraging Work) ----
thoughts4 <- findThoughts(selectedmodel, texts = meta$text, n = 7, topics = 4)$docs[[1]]
pdf("fig_rep-responses4.pdf", height = 8, width = 12)
par(family = "Times", ps = 15)
plotQuote(thoughts4, width = 100, main = "", family = "Times")
dev.off()

## Figure 6: Expected Topic Proportions across Experimental Groups and Their Relationship with Political Ideology ----
### Create an empty PDF file to store the graph
pdf("fig_stm.pdf", height = 9, width = 9)
par(mfrow = c(2, 2))

### Plot the heterogeneous treatment effects for Topic 1
set.seed(1234567)
prep <- estimateEffect(c(1) ~ group * cons.score, 
                       selectedmodel,
                       metadata = out$meta,
                       uncertainty = "Global")
par(family = "Times", ps = 14)
plot.estimateEffect(prep,
                    covariate = "cons.score",
                    model = selectedmodel,
                    method = "continuous",
                    xlab = "Conservatism Score",
                    moderator = "group",
                    moderator.value = 0,
                    linecol = "black",
                    ylim = c(0, 1),
                    printlegend = F,
                    main = "Topic 1 (Policy Analysis)")
plot.estimateEffect(prep,
                    covariate = "cons.score",
                    model = selectedmodel,
                    method = "continuous",
                    moderator = "group",
                    moderator.value = 1,
                    linecol = "blue",
                    ylim = c(0, 1),
                    add = T,
                    printlegend = F)
plot.estimateEffect(prep,
                    covariate = "cons.score",
                    model = selectedmodel,
                    method = "continuous",
                    moderator = "group",
                    moderator.value = 2,
                    linecol = "red",
                    ylim = c(0, 1),
                    add = T,
                    printlegend = F)
legend("topright", c("Control Group", "Treatment Group 1", "Treatment Group 2"), 
       lwd = 2, col = c("black", "blue", "red"))

### Plot the heterogeneous treatment effects for Topic 2
set.seed(1234567)
prep <- estimateEffect(c(2) ~ group * cons.score, 
                       selectedmodel,
                       metadata = out$meta,
                       uncertainty = "Global")
par(family = "Times", ps = 14)
plot.estimateEffect(prep,
                    covariate = "cons.score",
                    model = selectedmodel,
                    method = "continuous",
                    xlab = "Conservatism Score",
                    moderator = "group",
                    moderator.value = 0,
                    linecol = "black",
                    ylim = c(0, 1),
                    printlegend = F,
                    main = "Topic 2 (Helping the Needy)")
plot.estimateEffect(prep,
                    covariate = "cons.score",
                    model = selectedmodel,
                    method = "continuous",
                    moderator = "group",
                    moderator.value = 1,
                    linecol = "blue",
                    ylim = c(0, 1),
                    add = T,
                    printlegend = F)
plot.estimateEffect(prep,
                    covariate = "cons.score",
                    model = selectedmodel,
                    method = "continuous",
                    moderator = "group",
                    moderator.value = 2,
                    linecol = "red",
                    ylim = c(0, 1),
                    add = T,
                    printlegend = F)
legend("topright", c("Control Group", "Treatment Group 1", "Treatment Group 2"), 
       lwd = 2, col = c("black", "blue", "red"))

### Plot the heterogeneous treatment effects for Topic 3
set.seed(1234567)
prep <- estimateEffect(c(3) ~ group * cons.score, 
                       selectedmodel,
                       metadata = out$meta,
                       uncertainty = "Global")
par(family = "Times", ps = 14)
plot.estimateEffect(prep,
                    covariate = "cons.score",
                    model = selectedmodel,
                    method = "continuous",
                    xlab = "Conservatism Score",
                    moderator = "group",
                    moderator.value = 0,
                    linecol = "black",
                    ylim = c(0, 1),
                    printlegend = F,
                    main = "Topic 3 (Poverty and Inequality)")
plot.estimateEffect(prep,
                    covariate = "cons.score",
                    model = selectedmodel,
                    method = "continuous",
                    moderator = "group",
                    moderator.value = 1,
                    linecol = "blue",
                    ylim = c(0, 1),
                    add = T,
                    printlegend = F)
plot.estimateEffect(prep,
                    covariate = "cons.score",
                    model = selectedmodel,
                    method = "continuous",
                    moderator = "group",
                    moderator.value = 2,
                    linecol = "red",
                    ylim = c(0, 1),
                    add = T,
                    printlegend = F)
legend("topright", c("Control Group", "Treatment Group 1", "Treatment Group 2"), 
       lwd = 2, col = c("black", "blue", "red"))

### Plot the heterogeneous treatment effects for Topic 4
set.seed(1234567)
prep <- estimateEffect(c(4) ~ group * cons.score, 
                       selectedmodel,
                       metadata = out$meta,
                       uncertainty = "Global")
par(family = "Times", ps = 14)
plot.estimateEffect(prep,
                    covariate = "cons.score",
                    model = selectedmodel,
                    method = "continuous",
                    xlab = "Conservatism Score",
                    moderator = "group",
                    moderator.value = 0,
                    linecol = "black",
                    ylim = c(0, 1),
                    printlegend = F,
                    main = "Topic 4 (Discouraging Work)")
plot.estimateEffect(prep,
                    covariate = "cons.score",
                    model = selectedmodel,
                    method = "continuous",
                    moderator = "group",
                    moderator.value = 1,
                    linecol = "blue",
                    ylim = c(0, 1),
                    add = T,
                    printlegend = F)
plot.estimateEffect(prep,
                    covariate = "cons.score",
                    model = selectedmodel,
                    method = "continuous",
                    moderator = "group",
                    moderator.value = 2,
                    linecol = "red",
                    ylim = c(0, 1),
                    add = T,
                    printlegend = F)
legend("topright", c("Control Group", "Treatment Group 1", "Treatment Group 2"), 
       lwd = 2, col = c("black", "blue", "red"))
dev.off()